In [1]:
import nltk
import random
import pandas as pd
import plotly.express as px
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from IPython.display import display

# === Download Required NLTK Data ===
nltk.download('movie_reviews')

# === Step 1: Load Movie Review Dataset ===
documents = [(movie_reviews.raw(fileid), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

texts, labels = zip(*documents)

# === Step 2: Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, random_state=42)

# === Step 3: TF-IDF Vectorization ===
vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# === Step 4: Train Logistic Regression Classifier ===
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# === Step 5: Predictions & Evaluation ===
y_pred = model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"✅ Accuracy: {accuracy:.2f}")

# === Step 6: Display Classification Report as Table ===
report_df = pd.DataFrame(report).transpose()
report_df.index.name = "Label"
print("📊 Classification Report:")
display(report_df.style.background_gradient(cmap='coolwarm').format("{:.2f}"))

# === Step 7: Confusion Matrix Table ===
labels_order = model.classes_
conf_df = pd.DataFrame(conf_matrix, index=[f"Actual: {x}" for x in labels_order],
                       columns=[f"Predicted: {x}" for x in labels_order])
print("🧠 Confusion Matrix Table:")
display(conf_df.style.background_gradient(cmap='Blues'))

# === Step 8: Plot Confusion Matrix Heatmap ===
fig = px.imshow(conf_matrix,
                x=labels_order, y=labels_order,
                text_auto=True,
                color_continuous_scale='blues',
                title="🧠 Confusion Matrix Heatmap")
fig.update_layout(xaxis_title="Predicted", yaxis_title="Actual")
fig.show()

# === Step 9: Custom Sentences Batch Testing ===
def predict_sentiment_batch(texts):
    vec = vectorizer.transform(texts)
    preds = model.predict(vec)
    return list(zip(texts, preds))

samples = [
    "This movie was a masterpiece, the acting was amazing.",
    "It was a boring movie, I almost fell asleep.",
    "Absolutely fantastic! Highly recommend.",
    "Worst plot ever. Terrible pacing.",
    "Mediocre, not bad but not good either."
]

predictions = predict_sentiment_batch(samples)
df_pred = pd.DataFrame(predictions, columns=["Text", "Predicted Sentiment"])
print("💬 Sample Predictions Table:")
display(df_pred.style.set_properties(**{'white-space': 'pre-wrap'}))
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\acer\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
✅ Accuracy: 0.83
📊 Classification Report:
  precision recall f1-score support
Label        
neg 0.83 0.84 0.84 206.00
pos 0.83 0.82 0.83 194.00
accuracy 0.83 0.83 0.83 0.83
macro avg 0.83 0.83 0.83 400.00
weighted avg 0.83 0.83 0.83 400.00
🧠 Confusion Matrix Table:
  Predicted: neg Predicted: pos
Actual: neg 174 32
Actual: pos 35 159
💬 Sample Predictions Table:
  Text Predicted Sentiment
0 This movie was a masterpiece, the acting was amazing. pos
1 It was a boring movie, I almost fell asleep. neg
2 Absolutely fantastic! Highly recommend. pos
3 Worst plot ever. Terrible pacing. neg
4 Mediocre, not bad but not good either. neg
In [ ]: